{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples and Exercises from Think Stats, 2nd Edition\n",
    "\n",
    "http://thinkstats2.com\n",
    "\n",
    "Copyright 2016 Allen B. Downey\n",
    "\n",
    "MIT License: https://opensource.org/licenses/MIT\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "import nsfg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Examples from Chapter 1\n",
    "\n",
    "Read NSFG data into a Pandas DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>laborfor_i</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.8125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.8750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.1875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 244 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "0       1         1        NaN        NaN       NaN       NaN       6.0   \n",
       "1       1         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2       2         1        NaN        NaN       NaN       NaN       5.0   \n",
       "3       2         2        NaN        NaN       NaN       NaN       6.0   \n",
       "4       2         3        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "   pregend2  nbrnaliv  multbrth     ...       laborfor_i  religion_i  metro_i  \\\n",
       "0       NaN       1.0       NaN     ...                0           0        0   \n",
       "1       NaN       1.0       NaN     ...                0           0        0   \n",
       "2       NaN       3.0       5.0     ...                0           0        0   \n",
       "3       NaN       1.0       NaN     ...                0           0        0   \n",
       "4       NaN       1.0       NaN     ...                0           0        0   \n",
       "\n",
       "       basewgt  adj_mod_basewgt      finalwgt  secu_p  sest  cmintvw  \\\n",
       "0  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "1  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "2  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "3  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "4  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "\n",
       "   totalwgt_lb  \n",
       "0       8.8125  \n",
       "1       7.8750  \n",
       "2       9.1250  \n",
       "3       7.0000  \n",
       "4       6.1875  \n",
       "\n",
       "[5 rows x 244 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg = nsfg.ReadFemPreg()\n",
    "preg.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print the column names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['caseid', 'pregordr', 'howpreg_n', 'howpreg_p', 'moscurrp', 'nowprgdk',\n",
       "       'pregend1', 'pregend2', 'nbrnaliv', 'multbrth',\n",
       "       ...\n",
       "       'laborfor_i', 'religion_i', 'metro_i', 'basewgt', 'adj_mod_basewgt',\n",
       "       'finalwgt', 'secu_p', 'sest', 'cmintvw', 'totalwgt_lb'],\n",
       "      dtype='object', length=244)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single column name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pregordr'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column and check what type it is."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr = preg['pregordr']\n",
    "type(pregordr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "1        2\n",
       "2        1\n",
       "3        2\n",
       "4        3\n",
       "5        1\n",
       "6        2\n",
       "7        3\n",
       "8        1\n",
       "9        2\n",
       "10       1\n",
       "11       1\n",
       "12       2\n",
       "13       3\n",
       "14       1\n",
       "15       2\n",
       "16       3\n",
       "17       1\n",
       "18       2\n",
       "19       1\n",
       "20       2\n",
       "21       1\n",
       "22       2\n",
       "23       1\n",
       "24       2\n",
       "25       3\n",
       "26       1\n",
       "27       1\n",
       "28       2\n",
       "29       3\n",
       "        ..\n",
       "13563    2\n",
       "13564    3\n",
       "13565    1\n",
       "13566    1\n",
       "13567    1\n",
       "13568    2\n",
       "13569    1\n",
       "13570    2\n",
       "13571    3\n",
       "13572    4\n",
       "13573    1\n",
       "13574    2\n",
       "13575    1\n",
       "13576    1\n",
       "13577    2\n",
       "13578    1\n",
       "13579    2\n",
       "13580    1\n",
       "13581    2\n",
       "13582    3\n",
       "13583    1\n",
       "13584    2\n",
       "13585    1\n",
       "13586    2\n",
       "13587    3\n",
       "13588    1\n",
       "13589    2\n",
       "13590    3\n",
       "13591    4\n",
       "13592    5\n",
       "Name: pregordr, Length: 13593, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single element from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a slice from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    1\n",
       "3    2\n",
       "4    3\n",
       "Name: pregordr, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[2:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column using dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "pregordr = preg.pregordr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count the number of times each value occurs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    9148\n",
       "2    1862\n",
       "3     120\n",
       "4    1921\n",
       "5     190\n",
       "6     352\n",
       "Name: outcome, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.outcome.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the values of another variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0        8\n",
       "1.0       40\n",
       "2.0       53\n",
       "3.0       98\n",
       "4.0      229\n",
       "5.0      697\n",
       "6.0     2223\n",
       "7.0     3049\n",
       "8.0     1889\n",
       "9.0      623\n",
       "10.0     132\n",
       "11.0      26\n",
       "12.0      10\n",
       "13.0       3\n",
       "14.0       3\n",
       "15.0       1\n",
       "Name: birthwgt_lb, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthwgt_lb.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make a dictionary that maps from each respondent's `caseid` to a list of indices into the pregnancy `DataFrame`.  Use it to select the pregnancy outcomes for a single respondent."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 4, 4, 4, 4, 4, 1])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caseid = 10229\n",
    "preg_map = nsfg.MakePregMap(preg)\n",
    "indices = preg_map[caseid]\n",
    "preg.outcome[indices].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `birthord` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611933)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0     4413\n",
       "2.0     2874\n",
       "3.0     1234\n",
       "4.0      421\n",
       "5.0      126\n",
       "6.0       50\n",
       "7.0       20\n",
       "8.0        7\n",
       "9.0        2\n",
       "10.0       1\n",
       "Name: birthord, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "preg.birthord.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also use `isnull` to count the number of nans."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4445"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthord.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `prglngth` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611931)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       15\n",
       "1        9\n",
       "2       78\n",
       "3      151\n",
       "4      412\n",
       "5      181\n",
       "6      543\n",
       "7      175\n",
       "8      409\n",
       "9      594\n",
       "10     137\n",
       "11     202\n",
       "12     170\n",
       "13     446\n",
       "14      29\n",
       "15      39\n",
       "16      44\n",
       "17     253\n",
       "18      17\n",
       "19      34\n",
       "20      18\n",
       "21      37\n",
       "22     147\n",
       "23      12\n",
       "24      31\n",
       "25      15\n",
       "26     117\n",
       "27       8\n",
       "28      38\n",
       "29      23\n",
       "30     198\n",
       "31      29\n",
       "32     122\n",
       "33      50\n",
       "34      60\n",
       "35     357\n",
       "36     329\n",
       "37     457\n",
       "38     609\n",
       "39    4744\n",
       "40    1120\n",
       "41     591\n",
       "42     328\n",
       "43     148\n",
       "44      46\n",
       "45      10\n",
       "46       1\n",
       "47       1\n",
       "48       7\n",
       "50       2\n",
       "Name: prglngth, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "preg.prglngth.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To compute the mean of a column, you can invoke the `mean` method on a Series.  For example, here is the mean birthweight in pounds:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.265628457623368"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.totalwgt_lb.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a new column named <tt>totalwgt_kg</tt> that contains birth weight in kilograms.  Compute its mean.  Remember that when you create a new column, you have to use dictionary syntax, not dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.302558389828807"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "preg['totalwgt_kg'] = preg.totalwgt_lb / 2.2\n",
    "preg.totalwgt_kg.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`nsfg.py` also provides `ReadFemResp`, which reads the female respondents file and returns a `DataFrame`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = nsfg.ReadFemResp()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`DataFrame` provides a method `head` that displays the first five rows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5012</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>42</td>\n",
       "      <td>42</td>\n",
       "      <td>718</td>\n",
       "      <td>42</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>16:30:59</td>\n",
       "      <td>64.294000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11586</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>43</td>\n",
       "      <td>43</td>\n",
       "      <td>708</td>\n",
       "      <td>43</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:19:09</td>\n",
       "      <td>75.149167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6794</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>1042</td>\n",
       "      <td>15</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3783.152221</td>\n",
       "      <td>5071.464231</td>\n",
       "      <td>5923.977368</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>15:54:43</td>\n",
       "      <td>28.642833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>616</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>991</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>5341.329968</td>\n",
       "      <td>6437.335772</td>\n",
       "      <td>7229.128072</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>14:19:44</td>\n",
       "      <td>69.502667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "1    5012        1         5         1            5          5.0     42   \n",
       "2   11586        1         5         1            5          5.0     43   \n",
       "3    6794        5         5         4            1          5.0     15   \n",
       "4     616        1         5         4            1          5.0     20   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "1     42      718       42     ...               0  2335.279149   \n",
       "2     43      708       43     ...               0  2335.279149   \n",
       "3     15     1042       15     ...               0  3783.152221   \n",
       "4     20      991       20     ...               0  5341.329968   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "1      2846.799490  4744.191350       2    18     1233     1221    16:30:59   \n",
       "2      2846.799490  4744.191350       2    18     1234     1222    18:19:09   \n",
       "3      5071.464231  5923.977368       2    18     1234     1222    15:54:43   \n",
       "4      6437.335772  7229.128072       2    18     1233     1221    14:19:44   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "1   64.294000  \n",
       "2   75.149167  \n",
       "3   28.642833  \n",
       "4   69.502667  \n",
       "\n",
       "[5 rows x 3087 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `age_r` column from `resp` and print the value counts.  How old are the youngest and oldest respondents?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15    217\n",
       "16    223\n",
       "17    234\n",
       "18    235\n",
       "19    241\n",
       "20    258\n",
       "21    267\n",
       "22    287\n",
       "23    282\n",
       "24    269\n",
       "25    267\n",
       "26    260\n",
       "27    255\n",
       "28    252\n",
       "29    262\n",
       "30    292\n",
       "31    278\n",
       "32    273\n",
       "33    257\n",
       "34    255\n",
       "35    262\n",
       "36    266\n",
       "37    271\n",
       "38    256\n",
       "39    215\n",
       "40    256\n",
       "41    250\n",
       "42    215\n",
       "43    253\n",
       "44    235\n",
       "Name: age_r, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "resp.age_r.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can use the `caseid` to match up rows from `resp` and `preg`.  For example, we can select the row from `resp` for `caseid` 2298 like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "\n",
       "[1 rows x 3087 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp[resp.caseid==2298]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And we can get the corresponding rows from `preg` like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "      <th>totalwgt_kg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2610</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "      <td>3.125000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2611</th>\n",
       "      <td>2298</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.5000</td>\n",
       "      <td>2.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2612</th>\n",
       "      <td>2298</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.1875</td>\n",
       "      <td>1.903409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2613</th>\n",
       "      <td>2298</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "      <td>3.125000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 245 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "2610    2298         1        NaN        NaN       NaN       NaN       6.0   \n",
       "2611    2298         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2612    2298         3        NaN        NaN       NaN       NaN       6.0   \n",
       "2613    2298         4        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "      pregend2  nbrnaliv  multbrth     ...       religion_i  metro_i  \\\n",
       "2610       NaN       1.0       NaN     ...                0        0   \n",
       "2611       NaN       1.0       NaN     ...                0        0   \n",
       "2612       NaN       1.0       NaN     ...                0        0   \n",
       "2613       NaN       1.0       NaN     ...                0        0   \n",
       "\n",
       "          basewgt  adj_mod_basewgt     finalwgt  secu_p  sest  cmintvw  \\\n",
       "2610  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2611  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2612  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "2613  3247.916977      5123.759559  5556.717241       2    18      NaN   \n",
       "\n",
       "      totalwgt_lb  totalwgt_kg  \n",
       "2610       6.8750     3.125000  \n",
       "2611       5.5000     2.500000  \n",
       "2612       4.1875     1.903409  \n",
       "2613       6.8750     3.125000  \n",
       "\n",
       "[4 rows x 245 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[preg.caseid==2298]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How old is the respondent with `caseid` 1?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1069    44\n",
       "Name: age_r, dtype: int64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "resp[resp.caseid==1].age_r"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What are the pregnancy lengths for the respondent with `caseid` 2298?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2610    40\n",
       "2611    36\n",
       "2612    30\n",
       "2613    40\n",
       "Name: prglngth, dtype: int64"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "preg[preg.caseid==2298].prglngth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What was the birthweight of the first baby born to the respondent with `caseid` 5012?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5515    6.0\n",
       "Name: birthwgt_lb, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Solution\n",
    "\n",
    "preg[preg.caseid==5012].birthwgt_lb"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
